# # -*- coding: utf-8 -*-
# import scrapy
# from scrapy import Request
# from zc_core.model.items import Box
# from zc_core.util.batch_gen import time_to_batch_no
# from crccmall.rules import *
# from datetime import datetime
# from zc_core.util.http_util import *
#
#
# class SkuSpider(BaseSpider):
#     name = 'spu'
#     # 常用链接
#     index_url = 'https://www.crccmall.com/api/newcms/indexHome/getClass'
#     # 商品列表页 post
#     sku_list_url = 'https://www.crccmall.com/crccmall_solr/crccGoods/searchGoodsFromSolr?current={}&pageSize=12&gc_ids=&startPrice=&endPrice=&self_flag=&orderBy=&reverse=&goods_keywords=&brand_ids=&businessModel=&businessType=&provinceQueryValueStr=&initGoodsClassId={}'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(SkuSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#         print("批次为:", self.batch_no)
#         self.page_size = 20
#         self.max_page_limit = 100
#
#     def _build_list_req(self, url, catalog1Id, catalog1Name, catalog2Id, catalog2Name, catalog3Id, catalog3Name, page):
#         return Request(
#             url=url,
#             meta={
#                 'reqType': 'spu',
#                 'batchNo': self.batch_no,
#                 'page': page,
#                 'catalog1Id': catalog1Id,
#                 'catalog1Name': catalog1Name,
#                 'catalog2Id': catalog2Id,
#                 'catalog2Name': catalog2Name,
#                 'catalog3Id': catalog3Id,
#                 'catalog3Name': catalog3Name,
#             },
#             callback=self.parse_sku_content_deal,
#             errback=self.error_back,
#             dont_filter=True,
#         )
#
#     def start_requests(self):
#         # 品类、品牌
#         yield Request(
#             url=self.index_url,
#             meta={
#                 'reqType': 'spu',
#                 'batchNo': self.batch_no,
#
#             },
#             callback=self.parse_total_page,
#             errback=self.error_back,
#             dont_filter=True,
#         )
#
#     # 处理sku列表
#     def parse_total_page(self, response):
#         # 处理品类列表
#         cats = parse_catalog(response)
#         if cats:
#             self.logger.info('品类: count[%s]' % len(cats))
#             yield Box('catalog', self.batch_no, cats)
#
#             for cat in cats:
#                 if cat.get('level') == 3:
#                     # 采集sku列表
#                     catalog3Id = cat.get('catalogId')
#                     catalog3Name = cat.get('catalogName')
#                     catalog2Id = cat.get('parentId')
#                     catalog2Name = [i.get('catalogName') for i in cats if i.get('catalogId') == cat.get('parentId')][0]
#                     catalog1Id = [i.get('parentId') for i in cats if i.get('catalogId') == catalog2Id][0]
#                     catalog1Name = [i.get('catalogName') for i in cats if i.get('catalogId') == catalog1Id][0]
#                     yield self._build_list_req(self.sku_list_url.format(1,catalog3Id), catalog1Id, catalog1Name, catalog2Id, catalog2Name,
#                                                catalog3Id, catalog3Name, 1)
#
#     # 处理sku列表
#     def parse_sku_content_deal(self, response):
#
#         meta = response.meta
#         catalog1Name = meta.get('catalog1Name')
#         catalog1Id = meta.get('catalog1Id')
#         catalog2Name = meta.get('catalog2Name')
#         catalog2Id = meta.get('catalog2Id')
#         catalog3Name = meta.get('catalog3Name')
#         catalog3Id = meta.get('catalog3Id')
#         cur_page = meta.get('page')
#
#         # 处理sku列表
#         sku_list = parse_sku(response)
#         self.logger.info("清单: cat=%s, page=%s" % (catalog3Id, cur_page))
#         if sku_list:
#             yield Box('spu', self.batch_no, sku_list)
#             # 发起分页
#             if cur_page == 1:
#                 # 解析页面
#                 total_pages = parse_total_page(response)
#                 self.logger.info("清单1: cat=%s, total_page=%s" % (catalog3Id, total_pages))
#                 for page in range(2, total_pages + 1):
#                     yield self._build_list_req(self.sku_list_url.format(1,catalog3Id), catalog1Id, catalog1Name, catalog2Id, catalog2Name,
#                                                catalog3Id, catalog3Name, page)
#         else:
#             self.logger.info('空页: cat=%s, page=%s' % (catalog3Id, cur_page))
#

